{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import re\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "import collections\n",
    "import matplotlib.pyplot as plt\n",
    "import seaborn as sns\n",
    "import pickle\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# helper functions "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "class Experiment:\n",
    "    def __init__(self, recommender=\"PMF\", evaluator=\"Recall\", user_per=0.0, keep_days=0, hist=\"2014_1\"):\n",
    "        self.recommender = recommender\n",
    "        self.evaluator = evaluator\n",
    "        self.user_per = user_per\n",
    "        self.keep_days = keep_days\n",
    "        self.hist = hist"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def flatten(x):\n",
    "    if isinstance(x, collections.Iterable):\n",
    "        return [a for i in x for a in flatten(i)]\n",
    "    else:\n",
    "        return [x]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_valid_folders(in_dir, exp):\n",
    "    files = os.listdir(in_dir)\n",
    "    folders = []\n",
    "    for f in files:\n",
    "        #print(f)\n",
    "        if (exp.recommender in f)  and f.endswith(exp.hist) and \"{}_{}\".format(exp.user_per, exp.keep_days) in f:\n",
    "            folders.append(in_dir + f)\n",
    "    return sorted(folders)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_test_folders(exp):\n",
    "    test_dir = \"../movielens_test_logs/\"\n",
    "    files = os.listdir(test_dir)\n",
    "    folders = []\n",
    "    for f in files:\n",
    "        if (exp.recommender in f) and (exp.evaluator in f) and f.endswith(exp.hist) and \"{}_{}\".format(exp.user_per, exp.keep_days) in f:\n",
    "            folders.append(test_dir + f)\n",
    "    return sorted(folders)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_eva(folder, evaluator=\"Recall\", eval_name=None): \n",
    "    f_ = '\\\\d+\\\\.\\\\d+'\n",
    "    if \"@\" in evaluator:\n",
    "        evaluator, at = evaluator.split(\"@\")\n",
    "        at = int(at)//10-1\n",
    "    else:\n",
    "        at = 0\n",
    "    log_files = [os.path.join(folder, f) for f in sorted(os.listdir(folder))]\n",
    "    results = []\n",
    "    for f in log_files:\n",
    "        if os.path.isdir(f): f = f + '/training.log'\n",
    "        with open(f, 'r') as in_file:\n",
    "            lines = in_file.readlines()\n",
    "            val_r = []\n",
    "            for l in lines:\n",
    "                if evaluator in l:\n",
    "                    if eval_name in l: val_r.append(list(map(float, re.findall(f_, l)))[at])\n",
    "        if len(val_r) == 0:\n",
    "            print (f)\n",
    "        results.append(val_r)\n",
    "    return np.array(results)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<a id='config'></a>"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def generate_test_config(val_dir, exp):\n",
    "    display_itr = 1000 # result outputs after every 1000 iterations, change this according to variable used in 'time_validation.py'  \n",
    "    folders = get_valid_folders(val_dir, exp)\n",
    "    res = np.array([get_eva(folder, evaluator=exp.evaluator, eval_name=\"Val\") for folder in folders])\n",
    "    max_epoch = np.max(res, axis=2)\n",
    "    max_l2 = np.argmax(max_epoch, axis=0)\n",
    "    train_config = folders[flatten(max_l2)[0]].split('/')[-1].split('_')\n",
    "    training_itr = (flatten(np.argmax(res, axis=2)[max_l2])[0]+1) * display_itr \n",
    "    test_config = train_config[:-2] + [str(training_itr), \"_\".join(train_config[-2:])]\n",
    "    return test_config"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def save_test_config(recommender, evaluator):\n",
    "    \n",
    "    with open(\"../configs/{}_{}_test_config.txt\".format(recommender, evaluator), 'w') as outfile:\n",
    "        # baseline: no-filtering\n",
    "        exp = Experiment(recommender=recommender, evaluator=evaluator, user_per=0.0, keep_days=0)\n",
    "        test_config = generate_test_config(val_dir, exp)\n",
    "        outfile.write(\" \".join(test_config) + \"\\n\")\n",
    "\n",
    "        # with filter \n",
    "        for user_per in [0.25, 0.5, 0.75, 1.0]:\n",
    "            for keep_days in [1,7,14,30,60,90,180]:\n",
    "                exp = Experiment(recommender=recommender, evaluator=evaluator, user_per=user_per, keep_days=keep_days)\n",
    "                test_config = generate_test_config(val_dir, exp)\n",
    "                outfile.write(\" \".join(test_config) + \"\\n\")\n",
    "\n",
    "        # complete user records for time intervals of varying length in multiples of 6 months\n",
    "        for year in range(2010, 2014):\n",
    "            for month in [1,7]:\n",
    "                hist = \"{}_{}\".format(year, month)\n",
    "                exp = Experiment(recommender=recommender, evaluator=evaluator, user_per=0.0, keep_days=0, hist=hist)\n",
    "                test_config = generate_test_config(val_dir, exp)\n",
    "                outfile.write(\" \".join(test_config) + \"\\n\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Model configuration"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "val_dir = \"../movielens_validation_logs/\" # change to your log files directory\n",
    "for recommender in ['PMF', 'BPR', 'CML']:\n",
    "    for evaluator in ['Recall','NDCG']:\n",
    "        save_test_config(recommender, evaluator)"
   ]
  },
  {
   "cell_type": "raw",
   "metadata": {},
   "source": [
    "Note: this shoud generate model configurations in \"./configs/\" under the project folder."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Experiments"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1. Population-level performance "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_test_results(exp):\n",
    "    folders = get_test_folders(exp)\n",
    "    results = get_eva(folders[0], evaluator=exp.evaluator, eval_name=\"Test\")\n",
    "    return np.concatenate(results)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def generate_df(recommender=\"PMF\", evaluator=\"Recall\"):\n",
    "    df = pd.DataFrame(columns=[\"user_per\", \"keep_days\", \"recommender\", \"evaluator\", \"result\"])\n",
    "    row = 0\n",
    "    exp = Experiment(recommender=recommender, evaluator=evaluator, user_per=0.0, keep_days=0)\n",
    "    baseline_results = get_test_results(exp)\n",
    "    df.loc[row] = [0.0, 0, recommender, evaluator, np.mean(baseline_results)]\n",
    "    row += 1\n",
    "\n",
    "    for user_per in [0.25, 0.5, 0.75, 1.0]:\n",
    "        for keep_days in [1, 7, 14, 30, 60, 90, 180]:\n",
    "            exp = Experiment(recommender=recommender, evaluator=evaluator, user_per=user_per, keep_days=keep_days)\n",
    "            results = get_test_results(exp)\n",
    "            df.loc[row] = [user_per, keep_days, recommender, evaluator, np.mean(results)]\n",
    "            row += 1        \n",
    "    return df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "def plot_population():\n",
    "    fig, axn = plt.subplots(2, 3, sharex=True, sharey=True)\n",
    "    fig.set_size_inches(12,5)\n",
    "    fig.tight_layout(rect=[0, 0.1, 1, 1])\n",
    "    \n",
    "    recommenders = [\"CML\", \"BPR\", \"PMF\"]\n",
    "    evaluators = [\"Recall\", \"NDCG\"]\n",
    "    for i, ax in enumerate(axn.flat):\n",
    "        recommender = recommenders[i%3]\n",
    "        evaluator = evaluators[i//3]\n",
    "        df = generate_df(recommender=recommender, evaluator=evaluator)\n",
    "        baseline = df[\"result\"][0]\n",
    "        df[\"result_mean\"] = df[\"result\"].apply(lambda x: (x-baseline)*100/baseline)\n",
    "        plot_df = df.iloc[1:,]\n",
    "        plot_df[\"user_per\"] = plot_df[\"user_per\"].apply(lambda x: \"P={}\".format(x))\n",
    "\n",
    "        sns.pointplot(data=plot_df, ax=ax, x=\"keep_days\", y=\"result\", hue=\"user_per\", linestyles='--', palette=\"tab20\")\n",
    "        \n",
    "        plt.subplots_adjust(hspace = .1, wspace=.1)\n",
    "        ax.set_xlabel(\"\")\n",
    "        ax.set_ylabel(\"\")\n",
    "        ax.set_ylim(-45, 20)\n",
    "        ax.axhline(y=0, color='k', linestyle=\"--\", alpha=0.5)\n",
    "        ax.legend(bbox_to_anchor=(-0.05, -0.2, 1.1, -0.1), mode=\"expand\", ncol=4).set_visible(i==4)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# show population-level performance change under different settings (P,N)\n",
    "plot_population()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2. Compare recommenders on complete user records for time intervals of varying length"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def truncate_history(recommender, evaluator):    \n",
    "    df = pd.DataFrame(columns=[\"recommender\", \"evaluator\", \"history_length\", \"result\"])\n",
    "    row = 0\n",
    "    hist = \"{}_{}\".format(2014, 1)\n",
    "    exp = Experiment(recommender=recommender, evaluator=evaluator, user_per=0.0, keep_days=0, hist=hist)\n",
    "    result = get_test_results(exp)\n",
    "    df.loc[row] = [recommender, evaluator, hist, np.mean(result)]\n",
    "    row += 1\n",
    "    for year in range(2010, 2014):\n",
    "        for month in [1,7]:\n",
    "            hist = \"{}_{}\".format(year, month)\n",
    "            exp = Experiment(recommender=recommender, evaluator=evaluator, user_per=0.0, keep_days=0, hist=hist)\n",
    "            result = get_test_results(exp)\n",
    "            df.loc[row] = [recommender, evaluator, hist, np.mean(result)]\n",
    "            row += 1\n",
    "    return(df)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "truncate_history(\"PMF\", \"Recall\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "truncate_history(\"BPR\", \"NDCG\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "truncate_history(\"CML\", \"Recall\")"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3. Group-level peroformance"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from openrec.utils.evaluators import Recall, NDCG"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "def user_scores(p):\n",
    "    recall_evaluator = Recall(recall_at=[10])\n",
    "    ndcg_evaluator = NDCG(ndcg_at=[10])\n",
    "\n",
    "    score_per_user = dict()\n",
    "    count_per_user = dict()\n",
    "\n",
    "    for user in p['users']:\n",
    "        neg_scores = p['results'][user][:p['num_negatives']]\n",
    "        for i in range(len(p['user_items'][user][p['num_negatives']:])):\n",
    "            pos_score = p['results'][user][p['num_negatives'] + i]\n",
    "            rank_above = np.array([ float(np.sum(neg_scores > pos_score)) ])\n",
    "            negative_num = float(p['num_negatives'])\n",
    "            curr_score_recall = recall_evaluator.compute(rank_above, negative_num)[0]\n",
    "            curr_score_ndcg = ndcg_evaluator.compute(rank_above, negative_num)[0]\n",
    "            if user not in score_per_user:\n",
    "                score_per_user[user] = list()\n",
    "            if user not in count_per_user:\n",
    "                count_per_user[user] = 0.0\n",
    "            score_per_user[user].append((curr_score_recall, curr_score_ndcg))\n",
    "            count_per_user[user] += 1\n",
    "\n",
    "    # calculate per-user scores\n",
    "    per_user_recall = dict()\n",
    "    per_user_ndcg = dict()\n",
    "\n",
    "    for key in score_per_user.keys():\n",
    "        curr_recall = 0.0\n",
    "        curr_ndcg = 0.0\n",
    "        for tup in score_per_user[key]:\n",
    "            curr_recall += tup[0]\n",
    "            curr_ndcg += tup[1]\n",
    "        per_user_recall[ key ] = curr_recall / count_per_user[key]\n",
    "        per_user_ndcg[ key ] = curr_ndcg / count_per_user[key]\n",
    "        \n",
    "    return {\"Recall\":per_user_recall,\n",
    "            \"NDCG\": per_user_ndcg\n",
    "           }"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def user_grouping(logdir):\n",
    "    \n",
    "    with open(logdir+\"/filtered_data.npy\", 'rb') as filter_profile:\n",
    "        filter_data = np.load(filter_profile)\n",
    "\n",
    "    with open(logdir+\"/train_data.npy\", 'rb') as train_profile:\n",
    "        train_data = np.load(train_profile)\n",
    "        \n",
    "    with open(logdir+\"/test_data.npy\", 'rb') as test_profile:\n",
    "        test_data = np.load(test_profile)\n",
    "    \n",
    "    filter_user = np.unique(filter_data[\"user_id\"])\n",
    "    train_user = np.unique(train_data[\"user_id\"])\n",
    "    test_user = np.unique(test_data[\"user_id\"])\n",
    "    \n",
    "    # decompose test users into three groups: (1)user profile changed (2) user profile unchanged (3) new users (cold start)\n",
    "    group_filter = [i for i in test_user if i in filter_user]\n",
    "    group_same = [i for i in test_user if i in (set(train_user) - set(filter_user))]\n",
    "    group_new = [i for i in test_user if i not in (set(train_user) | set(filter_user))]\n",
    "    return [len(train_data), len(train_user), len(test_data), group_filter, group_same, group_new]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def user_compare(recommender=None, evaluator=None):\n",
    "    baseline = Experiment(recommender=recommender, evaluator=evaluator, user_per=0.0, keep_days=0)\n",
    "    baseline_folder = get_test_folders(baseline)[0]\n",
    "    baseline_files = [os.path.join(baseline_folder, f) for f in sorted(os.listdir(baseline_folder))]\n",
    "    \n",
    "    df = pd.DataFrame(columns=[\"group\", \"user_per\", \"keep_days\", \"recommender\", \"evaluator\", \"result_mean\", \"baseline_result_mean\", \"performance_change\"])\n",
    "    row = 0\n",
    "    \n",
    "    for user_per in [0.25, 0.5, 0.75, 1.0]:\n",
    "        for keep_days in [1,7,14,30,60,90,180]:\n",
    "            print (user_per, keep_days)\n",
    "            exp = Experiment(recommender=recommender, evaluator=evaluator, user_per=user_per, keep_days=keep_days)\n",
    "            folder = get_test_folders(exp)[0]\n",
    "            log_files = [os.path.join(folder, f) for f in sorted(os.listdir(folder))]\n",
    "\n",
    "            filter_users = []\n",
    "            unfilter_users = []\n",
    "            new_users = []\n",
    "\n",
    "            base_filter_users = []\n",
    "            base_unfilter_users = []\n",
    "            base_new_users = []\n",
    "\n",
    "            for i in range(len(log_files)):\n",
    "            \n",
    "                #print (\"###with user filtering###\") \n",
    "                with open(log_files[i]+\"/_evaluate_partial.pickle\", 'rb') as eva_file:\n",
    "                    p = pickle.load(eva_file)\n",
    "                per_user_performance = user_scores(p)[evaluator]\n",
    "                \n",
    "                group_filter, group_same, group_new = user_grouping(log_files[i])[-3:]\n",
    "                #print (len(group_filter), len(group_same), len(group_new))\n",
    "                assert len(per_user_performance) == len(group_filter) + len(group_same) + len(group_new)\n",
    "                \n",
    "                after_group_filter = [per_user_performance[u] for u in group_filter]\n",
    "                after_group_same = [per_user_performance[u] for u in group_same]\n",
    "                after_group_new = [per_user_performance[u] for u in group_new]\n",
    "\n",
    "                filter_users.append(np.mean(after_group_filter))\n",
    "                unfilter_users.append(np.mean(after_group_same))\n",
    "                new_users.append(np.mean(after_group_new))\n",
    "                \n",
    "                #print (\"###baseline###\")    \n",
    "                with open(baseline_files[i]+\"/_evaluate_partial.pickle\", 'rb') as eva_file:\n",
    "                    p = pickle.load(eva_file)\n",
    "                per_user_performance = user_scores(p)[evaluator]\n",
    "                \n",
    "                baseline_group_filter = [per_user_performance[u] for u in group_filter]\n",
    "                baseline_group_same = [per_user_performance[u] for u in group_same]\n",
    "                baseline_group_new = [per_user_performance[u] for u in group_new]\n",
    "            \n",
    "                base_filter_users.append(np.mean(baseline_group_filter))\n",
    "                base_unfilter_users.append(np.mean(baseline_group_same))\n",
    "                base_new_users.append(np.mean(baseline_group_new))\n",
    "            \n",
    "            df.loc[row] = [\"filtered\", user_per, keep_days, recommender, evaluator, \n",
    "                           np.mean(filter_users), np.mean(base_filter_users),\n",
    "                          (np.mean(filter_users) - np.mean(base_filter_users))/np.mean(base_filter_users)]\n",
    "            row +=1\n",
    "            \n",
    "            df.loc[row] = [\"no filtered\", user_per, keep_days, recommender, evaluator, \n",
    "                           np.mean(unfilter_users), np.mean(base_unfilter_users),\n",
    "                          (np.mean(unfilter_users) - np.mean(base_unfilter_users))/np.mean(base_unfilter_users)]\n",
    "            row +=1\n",
    "            \n",
    "            df.loc[row] = [\"cold start\", user_per, keep_days, recommender, evaluator, \n",
    "                           np.mean(new_users), np.mean(base_new_users),\n",
    "                          (np.mean(new_users) - np.mean(base_new_users))/np.mean(base_new_users)]\n",
    "            row +=1\n",
    "            \n",
    "    return df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### User group distribution"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "# use any recommender and evaluator \n",
    "recommender = \"PMF\" \n",
    "evaluator = \"NDCG\"\n",
    "df = pd.DataFrame(columns=[\"group\", \"user_per\", \"keep_days\", \"user_num\", \"percentage\"])\n",
    "row = 0\n",
    "for user_per in [0.25, 0.5, 0.75, 1.0]:\n",
    "    for keep_days in [1,7,14,30,60,90,180]:\n",
    "        exp = Experiment(recommender=recommender, evaluator=evaluator, user_per=user_per, keep_days=keep_days)\n",
    "        folder = get_test_folders(exp)[0]\n",
    "        log_files = [os.path.join(folder, f) for f in sorted(os.listdir(folder))]\n",
    "        stats = []\n",
    "        for i in range(len(log_files)):\n",
    "            stats.append(user_grouping(log_files[i]))\n",
    "        groups = np.mean(stats, axis=0)\n",
    "        df.loc[row] = [\"Group 1\", user_per, keep_days, len(groups[-3]), len(groups[-3])/len(groups[-4])]\n",
    "        row += 1\n",
    "        df.loc[row] = [\"Group 2\", user_per, keep_days, len(groups[-2]), len(groups[-2])/len(groups[-4])]\n",
    "        row +=1\n",
    "        df.loc[row] = [\"Group 3\", user_per, keep_days, len(groups[-1]), len(groups[-1])/len(groups[-4])]\n",
    "        row += 1\n",
    "        \n",
    "fig, axn = plt.subplots(1, 4, sharex=True, sharey=True)\n",
    "fig.set_size_inches(15,3)\n",
    "fig.tight_layout(rect=[0, 0.2, 1, 1])\n",
    "user_pers = [0.25, 0.5, 0.75, 1.0]\n",
    "days = [\"1\",\"7\",\"14\",\"30\",\"60\",\"90\",\"180\"]\n",
    "for i, ax in enumerate(axn.flat):\n",
    "    user_per = user_pers[i%4]\n",
    "    plot_df = df[df[\"user_per\"] == user_per]\n",
    "    group1 = plot_df[plot_df[\"group\"] == \"Group 1\"][\"percentage\"]\n",
    "    group2 = plot_df[plot_df[\"group\"] == \"Group 2\"][\"percentage\"]\n",
    "    group3 = plot_df[plot_df[\"group\"] == \"Group 3\"][\"percentage\"]\n",
    "    ax.bar(days, group1, label=\"Group 1\",alpha=0.8)\n",
    "    ax.bar(days, group2, bottom=group1, label=\"Group 2\", alpha=0.8)\n",
    "    ax.bar(days, group3, bottom=np.array(group2) + np.array(group1), label=\"Group 3\", alpha=0.8)\n",
    "    ax.legend(bbox_to_anchor=(-0.65, -0.2, 1.1, -0.1), mode=\"expand\", ncol=3).set_visible(i==2)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Group-level performance change after data filtering "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "scrolled": true
   },
   "outputs": [],
   "source": [
    "def plot_grouptrend(evaluator):\n",
    "    fig, axn = plt.subplots(3, 4, sharex=True, sharey=True)\n",
    "    fig.set_size_inches(12,6)\n",
    "    fig.tight_layout(rect=[0, 0.1, 1, 1])\n",
    "    recommenders = [\"CML\", \"BPR\", \"PMF\"]\n",
    "    user_pers = [0.25, 0.5, 0.75, 1.0]\n",
    "    legend_mapping = {\"filtered\": \"Group 1\", \"no filtered\":\"Group 2\", \"cold start\": \"Group 3\"}\n",
    "    \n",
    "    for i, ax in enumerate(axn.flat):\n",
    "        recommender = recommenders[i//4]\n",
    "        user_per = user_pers[i%4]\n",
    "        group_df = user_compare(recommender=recommender, evaluator=evaluator)\n",
    "        plot_df = group_df[(group_df[\"user_per\"] == user_per)]\n",
    "        plot_df[\"performance_change\"] = group_df[\"performance_change\"].apply(lambda x: x*100)\n",
    "        plot_df[\"group\"] = group_df[\"group\"].apply(lambda x: legend_mapping[x])\n",
    "        # sns.set(font_scale=1.1)\n",
    "        sns.pointplot(data=plot_df, ax=ax, x=\"keep_days\", y=\"performance_change\", hue=\"group\", linestyles='--', palette=\"tab10\")\n",
    "        ax.set_ylim(-45, 20)\n",
    "        ax.axhline(y=0, color='k', linestyle=\"--\", alpha=0.5)\n",
    "        plt.subplots_adjust(hspace = .1, wspace=.1)\n",
    "        ax.set_xlabel(\"\")\n",
    "        ax.set_ylabel(\"\")\n",
    "        ax.legend(bbox_to_anchor=(-0.75, -0.15, 1.5, -0.2), mode=\"expand\", ncol=4).set_visible(i==10)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plot_grouptrend(\"Recall\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plot_grouptrend(\"NDCG\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.5.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
